clear all
set more off
cap log close
log using tabdata5-2empcen.log, replace text
set linesize 255

*****************************************************
* CENSUS DATA EMPLOYMENT COUNT BY CATEGORY
*****************************************************
/*
PURPOSE: count employment by year x industry for 1995 and 2008.

CHANGES:
- 8/27: new industry definitions.
- 9/3: organize a bit.
*/
*****************************************************
* Date created: 8/15/2013 by Ying Chen
* Date updated: 9/3/2013
*****************************************************
* Files in:
global cns95 "..\..\data\empcensus\source\cns95.dta"
global cns08 "..\..\data\empcensus\source\cns08.dta"
global ciccorr "..\..\data\empcensus\generated\cic_correspondence.dta"
* Files out:
global out2 "..\..\data\empcensus\generated\cns_emp_count_newind.dta"
*****************************************************

* 1995
use $cns95, clear
drop _me
gen emp=gongren_he_xuetu_b
gen cs02=cic
merge m:1 cs02 using $ciccorr
drop if _me==2
replace cic=cs03 if _me==3
drop _me
gen indCode4=cic
gen indCode2=substr(cic,1,2)
gen indCode3=substr(cic,1,3)
destring indCode*, replace
keep if (indCode2>=6 & indCode2<=46)

* Industry categories
gen food=(indCode2>=13 & indCode2<=16)
gen trad=((indCode2>=20 & indCode2<=24) | indCode2==42)
gen txtlaprl=(indCode2>=17 & indCode2<=19)
gen plstc=(indCode2>=25 & indCode2<=30)
gen nonmtl=(indCode2==31)
gen metals=(indCode2>=32 & indCode2<=34)
gen metal=(indCode2==32 | indCode2==33)
gen metalpdct=(indCode2==34)
gen machinery=(indCode2>=35 & indCode2<=37)
gen cptNinst=(indCode2>=39 & indCode2<=41)
gen ht=(indCode3==368 | indCode3==376 | indCode2==40 | indCode3==411 | indCode3==412 | indCode3==414 | indCode3==419)
gen nonht=((indCode2>=35 & indCode2<=39 & ht==0) | (indCode3==413 | indCode3==415))
gen aprl=(indCode2==18)
gen eduspt=(indCode2==24)
gen geq=(indCode2==35)
gen seq=(indCode2==36)
gen trans=(indCode2==37)
gen ptran=(indCode3==371|indCode3==372|indCode3==373)
gen eeq=(indCode2==39)
gen comm=(indCode2==40)
gen inst=(indCode2==41)
gen art=(indCode2==42)
gen lghttrad=(indCode2==21 | indCode2==23 | indCode2==24)
gen hvytrad=(indCode2==20 | indCode2==22)
gen plstcNrbbr=(indCode2==29 | indCode2==30)
gen allhvy=(indCode2==20 | indCode2==22 | (indCode2>=13 & indCode2<=16) | (indCode2>=25 & indCode2<=28) | (indCode2>=31 & indCode2<=33))
gen allmdm=(indCode2==21 | indCode2==23 | indCode2==24 | indCode2==29 | indCode2==30 | indCode2==34)

* count employment for each industry
gen year=1995
bys year unit_code_common: egen unit_emp=total(emp)
foreach ind in food trad txtlaprl plstc nonmtl metals metal metalpdct machinery cptNinst ht nonht aprl eduspt geq seq trans ptran eeq comm inst art lghttrad hvytrad plstcNrbbr allhvy allmdm {
	bys year unit_code_common: egen `ind'_emp=total(emp) if `ind'==1
	gsort year unit_code_common -`ind'_emp
	by year unit_code_common: replace `ind'_emp=`ind'_emp[1] if `ind'_emp==.
	replace `ind'_emp=0 if `ind'_emp==.
	}
by year unit_code_common: keep if _n==1
keep year unit_code_common *_emp
tempfile 95
save `95'



* 2008
use indus_2digcode industrycode labor year unit_code_common using $cns08, clear
destring indus_2digcode, replace
keep if indus_2digcode>=6 & indus_2digcode<=46			// keep only industry //
rename indus_2digcode indCode2 
rename industrycode indCode4
rename labor emp
gen indCode3=substr(indCode4,1,3)
destring indCode* emp, replace
keep if (indCode2>=6 & indCode2<=46)

* Industry categories
gen food=(indCode2>=13 & indCode2<=16)
gen trad=((indCode2>=20 & indCode2<=24) | indCode2==42)
gen txtlaprl=(indCode2>=17 & indCode2<=19)
gen plstc=(indCode2>=25 & indCode2<=30)
gen nonmtl=(indCode2==31)
gen metals=(indCode2>=32 & indCode2<=34)
gen metal=(indCode2==32 | indCode2==33)
gen metalpdct=(indCode2==34)
gen machinery=(indCode2>=35 & indCode2<=37)
gen cptNinst=(indCode2>=39 & indCode2<=41)
gen ht=(indCode3==368 | indCode3==376 | indCode2==40 | indCode3==411 | indCode3==412 | indCode3==414 | indCode3==419)
gen nonht=((indCode2>=35 & indCode2<=39 & ht==0) | (indCode3==413 | indCode3==415))
gen aprl=(indCode2==18)
gen eduspt=(indCode2==24)
gen geq=(indCode2==35)
gen seq=(indCode2==36)
gen trans=(indCode2==37)
gen ptran=(indCode3==371|indCode3==372|indCode3==373)
gen eeq=(indCode2==39)
gen comm=(indCode2==40)
gen inst=(indCode2==41)
gen art=(indCode2==42)
gen lghttrad=(indCode2==21 | indCode2==23 | indCode2==24)
gen hvytrad=(indCode2==20 | indCode2==22)
gen plstcNrbbr=(indCode2==29 | indCode2==30)
gen allhvy=(indCode2==20 | indCode2==22 | (indCode2>=13 & indCode2<=16) | (indCode2>=25 & indCode2<=28) | (indCode2>=31 & indCode2<=33))
gen allmdm=(indCode2==21 | indCode2==23 | indCode2==24 | indCode2==29 | indCode2==30 | indCode2==34)

* count employment for each industry
bys year unit_code_common: egen unit_emp=total(emp)
foreach ind in food trad txtlaprl plstc nonmtl metals metal metalpdct machinery cptNinst ht nonht aprl eduspt geq seq trans ptran eeq comm inst art lghttrad hvytrad plstcNrbbr allhvy allmdm {
	bys year unit_code_common: egen `ind'_emp=total(emp) if `ind'==1
	gsort year unit_code_common -`ind'_emp
	by year unit_code_common: replace `ind'_emp=`ind'_emp[1] if `ind'_emp==.
	replace `ind'_emp=0 if `ind'_emp==.
	}
by year unit_code_common: keep if _n==1
keep year unit_code_common *_emp
tempfile 08
save `08'

* Append 95 and 08 data and save
append using `95'
save $out2, replace

log close
